# PAPER TITLE: The Dynamics of Racial Resentment Across the 50 U.S. States
# AUTHORS: Rebecca Kreitzer, Candis Smith, Feiya Suo
# EDITOR OF R DOCUMENTS: Feiya Suo
# FINAL EDIT DATE: July 2nd, 2019

# Pre-setting and Install Packages
rm(list=ls(all=TRUE))
library("arm")
library("foreign")
library("lme4")

# Set the working directory
setwd("/Users/feiyasuo/Documents/RA/racial/final")

# Read data
opinion.data <- read.csv("anes_final.csv") 
census <- read.csv("nber_final.csv") 

# Multi-level Regression
opinion.data$age_group <- as.factor(opinion.data$age_group)
opinion.data$race <- as.factor(opinion.data$race)
opinion.data$highest_education <- as.factor(opinion.data$highest_education)
opinion.data$gender<- as.factor(opinion.data$gender)
opinion.data$racial_index<- opinion.data$racial_index / 20

individual.model <- lm(racial_index ~ age_group + highest_education  + race + gender + white_female + black_male + black_female + other_male 
                       + other_female +white_male + state + year + ideology, data = opinion.data)
summary(individual.model)

# Create a prediction for each cell in Census data
census$age_group <- as.factor(census$age_group)
census$race <- as.factor(census$race)
census$highest_education <- as.factor(census$highest_education)
census$gender<- as.factor(census$gender)

census$cellpred <- predict.lm(individual.model, census)

# Calculate the weight for each type
census$type <- paste(census$year, census$state, census$age_group, census$highest_education, census$age_education, 
                     census$white_male, census$white_female, census$black_male,census$black_female, census$other_male, 
                     census$other_female)
census$year_state <- paste(census$year, census$state)

raw_result <- data.frame(n=table(census$type), year=NA, state=NA, score=NA)
year_state <- as.data.frame(table(census$year_state))  

raw_result$year <- substring(raw_result$n.Var1,1,4)
raw_result$state <- substring(raw_result$n.Var1,6,7)

for (i in 1:nrow(raw_result)) {
  raw_result$score[i] <- 
    census$cellpred[census$type==raw_result$n.Var1[i]]
}

raw_result$year_state <- substring(raw_result$n.Var1,1,7)
raw_result$year_state <- as.factor(raw_result$year_state)

raw_result$year_state <- droplevels(raw_result$year_state, "NA NA N")
year_state$Var1 <- droplevels(year_state$Var1, "NA NA")

raw_result$total <- NA
for (i in 1:nrow(raw_result)) {
  raw_result$total[i] <- 
    year_state$Freq[year_state$Var1==raw_result$year_state[i]]
}


raw_result$weight <- raw_result$n.Freq/raw_result$total

# Weight the predicted score
raw_result$weightedpred <- raw_result$weight * raw_result$score

# Get the final result
result1988 <- data.frame(year=1988, state=raw_result$state[raw_result$year==1988], 
                         pred=raw_result$weightedpred[raw_result$year==1988])
statepred1988 <- as.vector(tapply(result1988$pred,result1988$state,sum))

result1990 <- data.frame(year=1990, state=raw_result$state[raw_result$year==1990], 
                         pred=raw_result$weightedpred[raw_result$year==1990])
statepred1990 <- as.vector(tapply(result1990$pred,result1990$state,sum))

result1992 <- data.frame(year=1992, state=raw_result$state[raw_result$year==1992], 
                         pred=raw_result$weightedpred[raw_result$year==1992])
statepred1992 <- as.vector(tapply(result1992$pred,result1992$state,sum))

result1994 <- data.frame(year=1994, state=raw_result$state[raw_result$year==1994], 
                         pred=raw_result$weightedpred[raw_result$year==1994])
statepred1994 <- as.vector(tapply(result1994$pred,result1994$state,sum))

result2000 <- data.frame(year=2000, state=raw_result$state[raw_result$year==2000], 
                         pred=raw_result$weightedpred[raw_result$year==2000])
statepred2000 <- as.vector(tapply(result2000$pred,result2000$state,sum))

result2004 <- data.frame(year=2004, state=raw_result$state[raw_result$year==2004], 
                         pred=raw_result$weightedpred[raw_result$year==2004])
statepred2004 <- as.vector(tapply(result2004$pred,result2004$state,sum))

result2008 <- data.frame(year=2008, state=raw_result$state[raw_result$year==2008], 
                         pred=raw_result$weightedpred[raw_result$year==2008])
statepred2008 <- as.vector(tapply(result2008$pred,result2008$state,sum))

result2012 <- data.frame(year=2012, state=raw_result$state[raw_result$year==2012], 
                         pred=raw_result$weightedpred[raw_result$year==2012])
statepred2012 <- as.vector(tapply(result2012$pred,result2012$state,sum))

result2016 <- data.frame(year=2016, state=raw_result$state[raw_result$year==2016], 
                         pred=raw_result$weightedpred[raw_result$year==2016])
statepred2016 <- as.vector(tapply(result2016$pred,result2016$state,sum))

final_result <- data.frame(state=levels(result1988$state),year_1988=statepred1988,year_1990=statepred1990, 
                    year_1992=statepred1992, year_1994=statepred1994, year_2000=statepred2000, year_2004=statepred2004, 
                    year_2008=statepred2008, year_2012=statepred2012, year_2016=statepred2016)

# Save Final Results
write.csv(final_result, "final_result.csv")
